library(readr)
library(tidyverse)
library(ggplot2)
library(janitor)
library(tidycensus)
library(viridis) 
library(tscount)
library(yrbss)
library(zoo)
library(purrr)
library(gganimate)
library(usmap)
library(maps)
import_raw_data = FALSE

if(import_raw_data){

ss = read_csv("../../data_1/school_shootings.csv")
colnames(ss) = ss[1,]
ss = ss[-1,]%>%
  clean_names()%>%
  select(-c(na,na_2))%>%
  filter(reliability_score_1_5!=1)

ss2 = read_csv("../../data_1/ss_pt2.csv")%>%
  select(-X1)
colnames(ss2)=colnames(ss)[34:ncol(ss)]

ss[(1426:nrow(ss)),(34:ncol(ss))]=ss2

ss$shooter_age = as.numeric(ss$shooter_age)

write.csv(ss,"ss_data.csv")
}

Read in and clean data

# ss is csv file with school shooting data (individual incidents)
ss = read_csv("ss_data.csv")%>%
  select(-X1)%>%
  mutate(year = as.numeric(substr(date, (nchar(date)-3),nchar(date))))%>%
  mutate(state = ifelse(state == "St. Croix, US Virgin Islands","St.Croix",state))%>%
  distinct_at(.vars = c(1:24), .keep_all = T)%>%
  filter(category != "Accident")

Population data

# data set to go from state names to abbreviations

states = data.frame(state.name,state.abb)
dc = data.frame(matrix(ncol= 2, nrow = 1))
colnames(dc)=colnames(states)
dc$`state.name` = "District of Columbia"
dc$`state.abb` = "DC"

states = rbind(states,dc)
states$state.name = as.character(states$state.name)

if(import_raw_data){

# 1980-1990
pop8090_raw = read.delim("https://www2.census.gov/programs-surveys/popest/tables/1980-1990/state/asrh/st8090ts.txt" )%>%data.frame()
pop8090_raw = pop8090_raw[c(8:58,64:114),]%>%data.frame()
colnames(pop8090_raw)="var"
pop8090 = data.frame(pop8090_raw$var[1:51],pop8090_raw$var[52:102])

colnames(pop8090)=c("var1","var2")


pop8090 = pop8090%>%
  separate(var1, c("state",as.character(seq(1980,1984,by = 1))), extra = "merge")%>%
  separate(var2, c("state1",as.character(seq(1985,1990,by = 1))), extra = "merge")%>%
  select(c(state,starts_with("19")))
  
# 1970 - 1980
pop7080_raw = read.delim("https://www2.census.gov/programs-surveys/popest/tables/1980-1990/state/asrh/st7080ts.txt")
pop7080_raw = pop7080_raw[c(10:61,63:114),]%>%data.frame()
colnames(pop7080_raw)="var"
pop7080 = data.frame(pop7080_raw$var[1:51],pop7080_raw$var[53:103])

colnames(pop7080)=c("var1","var2")

pop7080 = pop7080%>%
  separate(var1, c("blnk","id","state","1970","1971","1972","1973","1974","1975"), extra = "merge")%>%
  separate(var2, c("blnk1","id1","state1",as.character(seq(1976,1980,by = 1))), extra = "merge")%>%
  select(c(state,starts_with("19")))

# 1990-2000
pop90_00 = read_csv("../../data/pop_90-00.csv")%>%
  rename(state = X1)%>%
  select(c(state,3:13))%>%
  filter(state != "USA")
colnames(pop90_00)[2:ncol(pop90_00)]=as.character(seq(1990,2000,by = 1))
pop90_00$state[30:35] = states$state.name[29:34]
pop90_00$state[40:42] = states$state.name[39:41]
pop90_00$state[9] = states$state.name[51]
pop90_00$state[49] = states$state.name[48]

# 2000-2010
pop00_10 = read.csv("https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/state/st-est00int-agesex.csv")%>%
  clean_names()%>%
  filter(sex == 0, age == 999)%>%
  select(c(name,starts_with("popestimate")))%>%
  rename_at(vars(starts_with("popestimate")),funs(substr(.,start = 12, stop = 15)))%>%
  rename(state = name)%>%
  mutate(state = as.character(state))%>%
  mutate(state = ifelse(state=="District of Columbia",as.character(states$state.name[51]),state))%>%
  filter(state != "United States")

# 2010-2018
pop10_18 = read_csv("../../data/pop_2010-2018.csv")
colnames(pop10_18)=pop10_18[1,]

pop10_18 = pop10_18%>%
  slice(-1)%>%
  rename_at(vars(starts_with("Pop")), funs(substr(.,start = 38,stop = 41)))%>%
  rename(state = Geography)%>%
  select(c(state,starts_with("20")))%>%
  mutate(state = ifelse(state=="District of Columbia",as.character(states$state.name[51]),state))

# 2019

pop2019 = read_csv("../../data/2019pop.csv")%>%
  clean_names()%>%
  select(c(state,pop))%>%
  rename("2019"=pop)



pops = left_join(pop7080,pop8090%>%select(-c("1980")), by = "state")%>%
  left_join(states,by = c("state"="state.abb"))%>%
  mutate(state = state.name)%>%
  select(-state.name)%>%
  left_join(pop90_00%>%select(-c("1990")), by = "state")%>%
  left_join(pop00_10%>%select(-c("2000")), by = "state")%>%
  left_join(pop10_18%>%select(-c("2010")), by = "state")%>%
  left_join(pop2019,by = "state")%>%
  gather(key = "year", value = "population", c(2:51))


write.csv(pops, "state_populations.csv")

}

Read in population data

# read in population data (include abbreviated and full state name)
pops = read_csv("state_populations.csv")%>%
  select(-X1)%>%
  left_join(states, by = c("state"="state.name"))

Incident count format

sts = unique(ss$state)
yrs = seq(range(ss$year)[1],range(ss$year)[2],by=1)

st_yr = data.frame(state = rep(sts,length(yrs)), year = sort(rep(yrs,length(sts))))%>%
  arrange(state)

## data set aggrgated by state and year
ss_counts = ss%>%
  group_by(state,year) %>%
  summarize(incident_count = n(), 
            total_victims = sum(total_injured_killed_victims), 
            total_fatalities = sum(killed_includes_shooter),
            total_wounded = sum(wounded),
            avg_victims = mean(total_injured_killed_victims),
            avg_fatalities = mean(killed_includes_shooter),
            ave_wounded = mean(wounded),
            avg_shooter_age = mean(shooter_age),
            max_shooter_age = max(shooter_age),
            min_shooter_age = min(shooter_age),
            avg_reliability = mean(reliability_score_1_5),
            target = mean(ifelse(targeted_specific_victim_s=="Y",1,
                                 ifelse(targeted_specific_victim_s=="N",0,NA)),na.rm = T),
            random_vict = mean(ifelse(random_victims=="Y",1,
                                      ifelse(random_victims=="N",0,NA)),na.rm = T),
            bullied = mean(ifelse(bullied_y_n_n_a=="Y",1,
                                  ifelse(bullied_y_n_n_a=="N",0,NA)),na.rm = T),
            domestic_violence = mean(ifelse(domestic_violence_y_n=="Y",1,
                                            ifelse(domestic_violence_y_n=="N",0,NA)),na.rm = T))%>%
  mutate(in_ss = TRUE)%>%
  full_join(st_yr, by = c("state","year"))%>%
  left_join(pops%>%rename(full_state_name = state), by = c("state"="state.abb", "year"))%>%
  mutate(incident_count = ifelse(is.na(incident_count),0,incident_count))%>%
  mutate(in_ss = ifelse(is.na(in_ss),FALSE,in_ss))

Grade data

# read in state grade data

if(import_raw_data){
grades18 = read_csv("../../data_1/2018grades.csv")%>%
  select(-c(X6,X7))%>%
  clean_names()%>%
  select(-gun_death_rate_per_100k)%>%
  mutate(year = "2018")%>%
  rename(grade = x2018_grade)%>%
  select(state,grade,year)

grades17 = read_csv("../../data_1/2017grades.csv")%>%
  clean_names()%>%
  mutate(year = "2017")%>%
  rename(grade = x2017_grade)%>%
  select(state,grade,year)

grades16 = read_csv("../../data_1/2016grades.csv")%>%
  clean_names()%>%
  mutate(year = "2016")%>%
  rename(grade = x2016_grade)%>%
  select(state,grade,year)


grades15 = read_csv("../../data_1/2015grades.csv")%>%
  clean_names()%>%
  mutate(year = "2015")%>%
  rename(grade = x2015_grade)%>%
  select(state,grade,year)
# add in missing value
grades15 = rbind(grades15,c("Kansas","F",2015))

grades14 = read_csv("../../data_1/2014grades.csv")%>%
  clean_names()%>%
  mutate(year = "2014")%>%
  rename(grade = x2014_grade)%>%
  select(state,grade,year)

grades13 = read_csv("../../data_1/2013grades.csv")%>%
  clean_names()%>%
  mutate(year = "2013")%>%
  rename(grade = x2013_grade)%>%
  select(state,grade,year)

grades12 = read_csv("../../data_1/2012grades.csv")%>%
  clean_names()%>%
  mutate(year = "2012")%>%
  rename(grade = x2012_grade)%>%
  select(state,grade,year)

# combine all year data 

grades = bind_rows(grades18,grades17)%>%
  bind_rows(grades16)%>%
  bind_rows(grades15)%>%
  bind_rows(grades14)%>%
  bind_rows(grades13)%>%
  bind_rows(grades12)

write.csv(grades,"state_grades.csv")
}

Read in grade data

grades = read_csv("state_grades.csv")%>%
  select(-X1)

gpa_convert = data.frame(letter_grade = c("A","A-","B+","B","B-",
                                          "C+","C","C-","D+","D","D-","F"),
                         gpa = c(4,3.7,3.3,3,2.7,2.3,2,1.7,1.3,1,0.7,0))

ss_counts = ss_counts%>%
  left_join(grades%>%select(state,grade,year),by = c("full_state_name"="state","year"))%>%
  left_join(gpa_convert, by = c("grade"="letter_grade"))

Media data

media = read_csv("media_data.csv")%>%
  separate(`Year-Month`, into = c("year","month"), sep = "-")%>%
  clean_names()%>%
  select(-starts_with("x"))%>%
  group_by(year) %>%
  mutate(yearly_articles = sum(articles_shootings_excluding_firearm_laws_and_regulations))%>% 
  mutate(yearly_shootings = sum(mass_shooting))%>%
  mutate(art_per_inc = yearly_articles/yearly_shootings)%>%
  ungroup()%>%
  mutate(prev_month_art = c(0,articles_shootings_excluding_firearm_laws_and_regulations[1:227]))%>%
  mutate(prev_year_art = c(rep(0,12),yearly_articles[1:216]))%>%
  mutate(year = as.numeric(year))


ss_counts = ss_counts%>%
  left_join(media%>%select(prev_year_art, year,yearly_articles)%>%distinct(), by = "year")

Poverty data

pov=read.csv("census_poverty_data.csv",stringsAsFactors = FALSE)
pov=filter(pov,State!="      United States"&State!="     United States"& State!="District of Columbia"&State!="United States")

names(pov)[1:2]<-c("state","poverty")
names(pov)[4]<-"year"

skel=expand_grid(sort(unique(pov$state)),min(pov$year):(max(pov$year)+2))
names(skel)<-c("state","year")



sk1=filter(skel,year==2007|year==2008|year==2009)
sk1=left_join(sk1,filter(pov,year==2007),by="state")

sk2=filter(skel,year==2010|year==2011|year==2012)
sk2=left_join(sk2,filter(pov,year==2010),by="state")

sk3=filter(skel,year==2013|year==2014|year==2015)
sk3=left_join(sk3,filter(pov,year==2013),by="state")

sk4=filter(skel,year==2016|year==2017|year==2018)
sk4=left_join(sk4,filter(pov,year==2016),by="state")

pov=rbind(sk1,sk2,sk3,sk4)
pov=pov[,1:3]
names(pov)<-c("state","year","poverty")

Merge poverty data

pov = left_join(pov,states, by = c("state"="state.name"))%>%
  select(-state)%>%
  rename(state = state.abb)
ss_counts = left_join(ss_counts, pov, by = c("state","year"))

Bullying data

bully = read_csv("State_bullying.csv")%>%
  select(c(total_score,state.abb))%>%
  rename(state = state.abb)%>%
  mutate(year = 2010)

ss_counts = left_join(ss_counts, bully%>%select(-year), by = "state")%>%
  rename(bullying_score = total_score)%>%
  mutate(bullying_score = as.numeric(bullying_score))

Mental Health data

mh.data=read.csv("mh.data.csv",stringsAsFactors = FALSE)%>%
  clean_names()%>%
  select(-x)%>%
  left_join(states, by = c("state"="state.name"))%>%
  select(-state)%>%
  rename(state = state.abb)

ss_counts = ss_counts%>%
  left_join(mh.data, by = c("state","year"))%>%
  rename(mh_score = percent)
## Warning: Column `state` joining character vector and factor, coercing into
## character vector

CDC Bullying Data

qns = c("qn13","qn14","qn15","qn16","qn17","qn18","qn19","qn20","qn24","qn25","qn26","qn27","qn28","qn29","qn30","qnbullyweight","qnbullygay")
sts = yrbss::getListOfParticipatingStates()
yrs = c(1991,1993,1995,1997,1999,2001,2003,2005,2007,2009,2011,2013,2015)

if(import_raw_data){
bullying = data.frame(variable = NULL, state = NULL, year = NULL, prop= NULL, ciLB= NULL, ciUB = NULL, n = NULL)

for(v in qns){
  for(s in sts){
    for(y in yrs ){
      p = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$prop
    ciL = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$ciLB
    ciU = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$ciUB
    sample_size = yrbss::getProportionSingleVariable(.variable = v, .location = s, .year = y, .level = 0.95)$n
    newrow = data.frame(variable = v, state = s, year = y, prop= p, ciLB= ciL, ciUB = ciU, n = sample_size)
    bullying = bind_rows(bullying,newrow)}
}}

lab = data.frame(question = yrbss::yrbss_questions_binary$variable, label = yrbss::yrbss_questions_binary$label)%>%
   filter(question %in% qns)
 
bullying = left_join(bullying,lab, by = c(variable = "question"))
bullying$state[which(bullying$state=="AZB")]="AZ"
write_csv(bullying, "bullying_survey.csv")
}

lab = data.frame(question = yrbss::yrbss_questions_binary$variable, label = yrbss::yrbss_questions_binary$label)%>%
   filter(question %in% qns)
# read in csv
bullying_survey = read_csv("bullying_survey.csv")
## Parsed with column specification:
## cols(
##   variable = col_character(),
##   state = col_character(),
##   year = col_double(),
##   prop = col_double(),
##   ciLB = col_double(),
##   ciUB = col_double(),
##   n = col_double(),
##   label = col_character()
## )
# check missingness for 2009-2015
sum(!complete.cases(filter(bullying_survey, year>=2007)))/nrow(filter(bullying_survey, year>=2007))
## [1] 0.3135135
### We're going to need to impute values for missing years and states for some of these variables if these data are going to be useful in our analysis

Make combined variables for survey questions

# combine suicide questions 
suicide = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
suicide_qns = bullying_survey%>%
  filter(variable %in% c("qn26","qn27","qn28","qn29","qn30"))

sts[which(sts=="AZB")]="AZ"
for(y in yrs){
  for(st in sts){
    dat = filter(suicide_qns,(year ==y & state == st))
    suicide_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
    samp = mean(dat$n,na.rm = TRUE)
    newrow = data.frame(year = y, state = st, score = suicide_score, n = samp)
    suicide = bind_rows(suicide,newrow)
  }
}

# combine questions involving physical fights
fight = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
fight_qns = bullying_survey%>%
  filter(variable %in% c("qn18","qn19","qn20"))

for(y in yrs){
  for(st in sts){
    dat = filter(fight_qns,(year ==y & state == st))
    fight_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
    samp = mean(dat$n,na.rm = TRUE)
    newrow = data.frame(year = y, state = st, score = fight_score, n = samp)
    fight = bind_rows(fight,newrow)
  }
}

# school safety questions
safety = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
safety_qns = bullying_survey%>%
  filter(variable %in% c("qn13","qn14","qn15","qn16","qn17"))

for(y in yrs){
  for(st in sts){
    dat = filter(safety_qns,(year ==y & state == st))
    safety_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
    samp = mean(dat$n,na.rm = TRUE)
    newrow = data.frame(year = y, state = st, score = safety_score, n = samp)
    safety = bind_rows(safety,newrow)
  }
}

# bullying questions
bull = data.frame(year = NULL, state = NULL, score = NULL, n = NULL)
bully_qns = bullying_survey%>%
  filter(variable %in% c("qn24","qn25","qnbullyweight","qnbullygay"))

for(y in yrs){
  for(st in sts){
    dat = filter(bully_qns,(year ==y & state == st))
    bully_score = weighted.mean(dat$prop,1/(dat$n), na.rm = TRUE)
    samp = mean(dat$n,na.rm = TRUE)
    newrow = data.frame(year = y, state = st, score = bully_score, n = samp)
    bull = bind_rows(bull,newrow)
  }
}


survey_combined = bind_cols(suicide,safety, fight, bull)%>%
  select(year, state, score, n, score1, n1, score2, n2, score3, n3)%>%
  rename(suicide_score = score, school_safety_score = score1, fight_score = score2, bully_score = score3)%>%
  filter(year>=2007)%>%
  full_join(st_yr%>%filter(year>=2007))%>%
  arrange(year)%>%
  arrange(state)%>%
  mutate(state = ifelse(state == "AZB","AZ",state))

survey_combined = survey_combined%>%
  group_by(state)%>%
  mutate(suicide_score =ifelse(year<2016,ifelse(is.na(suicide_score), na.locf(suicide_score)/2+na.locf(suicide_score, fromLast = TRUE)/2,suicide_score),
                              NA ))%>%
  mutate(school_safety_score =ifelse(year<2016,ifelse(is.na(school_safety_score), na.locf(school_safety_score)/2+na.locf(school_safety_score, fromLast = TRUE)/2,school_safety_score),NA))%>%
    mutate(fight_score =ifelse(year<2016,ifelse(is.na(fight_score), na.locf(fight_score)/2+na.locf(fight_score, fromLast = TRUE)/2,fight_score),NA))%>%
      mutate(bully_score =ifelse(year<2016,ifelse(is.na(bully_score), na.locf(bully_score)/2+na.locf(bully_score, fromLast = TRUE)/2,bully_score),NA))%>%
        mutate(n=ifelse(year<2016,ifelse(is.na(n), na.locf(n)/2+na.locf(n, fromLast = TRUE)/2,n),NA))%>%
          mutate(n1=ifelse(year<2016,ifelse(is.na(n1), na.locf(n1)/2+na.locf(n1, fromLast = TRUE)/2,n1),NA))%>%
          mutate(n2=ifelse(year<2016,ifelse(is.na(n2), na.locf(n2)/2+na.locf(n2, fromLast = TRUE)/2,n2),NA))%>%       
  mutate(n3=ifelse(year<2016,ifelse(is.na(n3), na.locf(n3)/2+na.locf(n3, fromLast = TRUE)/2,n3),NA))


# use fitted line to get 2016-2019 values 
for(s in sts[which(!(sts %in% c("MA","AZB")))]){
  dat = filter(survey_combined, state ==s)
  ind = which(is.na(dat$suicide_score))[1]
  slp = dat[ind-1,3:10]-dat[ind-2,3:10]
  dat[which(is.na(dat$suicide_score))[1],3:10] =  dat[(ind-1),3:10]+as.numeric(dat[which(is.na(dat$suicide_score))[1],1]-2015)*slp
    dat[which(is.na(dat$suicide_score))[1],3:10] =  dat[(ind-1),3:10]+as.numeric(dat[which(is.na(dat$suicide_score))[1],1]-2015)*slp  
    dat[which(is.na(dat$suicide_score))[1],3:10] =  dat[(ind-1),3:10]+as.numeric(dat[which(is.na(dat$suicide_score))[1],1]-2015)*slp  
    dat[which(is.na(dat$suicide_score))[1],3:10] =  dat[(ind-1),3:10]+as.numeric(dat[which(is.na(dat$suicide_score))[1],1]-2015)*slp

  survey_combined[which(survey_combined$state == s),]=dat
}

# chech to see if mean and median are different
survey_combined%>%
  group_by(year)%>%
  summarise(med_ss = median(suicide_score, na.rm = T), mn_ss = mean(suicide_score, na.rm = T),
            med_safe = median(school_safety_score, na.rm = T), mn_safe = mean(school_safety_score, na.rm = T),
            med_f = median(fight_score, na.rm = T), mn_f = mean(fight_score, na.rm = T),
            med_b = median(bully_score, na.rm = T), mn_b = mean(bully_score, na.rm = T))
## # A tibble: 13 x 9
##     year med_ss mn_ss med_safe mn_safe  med_f   mn_f med_b  mn_b
##    <dbl>  <dbl> <dbl>    <dbl>   <dbl>  <dbl>  <dbl> <dbl> <dbl>
##  1  2007  0.118 0.123   0.0913  0.0885 0.154  0.155  0.195 0.198
##  2  2008  0.119 0.124   0.0880  0.0872 0.152  0.154  0.195 0.198
##  3  2009  0.123 0.124   0.0843  0.0852 0.145  0.153  0.195 0.198
##  4  2010  0.126 0.125   0.0827  0.0851 0.144  0.146  0.186 0.187
##  5  2011  0.121 0.125   0.0836  0.0843 0.137  0.143  0.185 0.187
##  6  2012  0.129 0.132   0.0827  0.0855 0.128  0.136  0.182 0.187
##  7  2013  0.134 0.137   0.0845  0.0893 0.122  0.127  0.183 0.187
##  8  2014  0.140 0.140   0.0847  0.0869 0.109  0.121  0.189 0.185
##  9  2015  0.143 0.142   0.0810  0.0836 0.105  0.113  0.189 0.185
## 10  2016  0.148 0.144   0.0756  0.0803 0.103  0.106  0.189 0.184
## 11  2017  0.148 0.146   0.0738  0.0770 0.100  0.0985 0.189 0.184
## 12  2018  0.148 0.149   0.0715  0.0737 0.0937 0.0910 0.189 0.184
## 13  2019  0.148 0.151   0.0713  0.0704 0.0887 0.0836 0.189 0.184
# impute missing values with median value for each column

survey_combined = survey_combined%>%
  ungroup()%>%
  group_by(year)%>%
  mutate(suicide_score = ifelse(is.na(suicide_score), median(suicide_score, na.rm = T),suicide_score))%>%
  mutate(school_safety_score = ifelse(is.na(school_safety_score), median(school_safety_score, na.rm = T),school_safety_score))%>%
  mutate(fight_score = ifelse(is.na(fight_score), median(fight_score, na.rm = T),fight_score))%>%
  mutate(bully_score = ifelse(is.na(bully_score), median(bully_score, na.rm = T),bully_score))%>%
  mutate(n = ifelse(is.na(n), median(n, na.rm = T),n))%>%
  mutate(n1 = ifelse(is.na(n1), median(n1, na.rm = T),n1))%>%
  mutate(n2 = ifelse(is.na(n2), median(n2, na.rm = T),n2))%>%
  mutate(n3 = ifelse(is.na(n3), median(n3, na.rm = T),n3))%>%
  mutate(school_safety_score = ifelse(school_safety_score<0,0,school_safety_score))%>%
  mutate(fight_score = ifelse(fight_score<0,0,fight_score))

ss_counts = left_join(ss_counts, survey_combined, by = c("state","year"))%>%
  filter(state != "DC")
rand.count.data = read_csv("cleaned.rand.csv")%>%
  select(-X1)%>%
  left_join(states, by = c("state"= "state.name"))%>%
  select(-state)%>%
  rename(state = state.abb)
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character()
## )
## See spec(...) for full column specifications.
ss_counts = left_join(ss_counts,rand.count.data, by = c("state","year"))
## Warning: Column `state` joining character vector and factor, coercing into
## character vector
ss_counts = ss_counts%>%
  mutate(access_laws = -1*(age_permissive-age_restrictive+child_access_permissive-child_access_restrictive+prohibited_possessor_permissive-prohibited_possessor_restrictive))%>%
  mutate(transport= -1*(castle_doctrine_permissive-castle_doctrine_restrictive+open_carry_permissive-open_carry_restrictive+carrying_permissive-carrying_permissive))%>%
  mutate(screening = -1*(background_check_permissive- background_check_restrictive + waiting_period_permissive- waiting_period_restrictive + permit_permissive - permit_restrictive + registration_permissive - registration_restrictive + safety_training_permissive - safety_training_restrictive + dealer_license_permissive - dealer_license_restrictive + one_gun_per_permissive - one_gun_per_restrictive))%>%
  mutate(ban = -ban_permissive+ban_restrictive)%>%
  select(-c(36:63))

EDA

table(ss$reliability_score_1_5)%>%
  data.frame()%>%
  ggplot(aes(x = Var1, y = Freq))+
  geom_bar(aes(fill = Var1), stat = "identity")+
  scale_fill_discrete(name = "Reliability Score")+
  labs(x = "Reliability Score", y = "Count")

table(ss$school_type)%>%
  data.frame()%>%
  ggplot(aes(x = Var1, y = Freq))+
  geom_bar(aes(fill = Var1), stat = "identity")+
  scale_fill_discrete(name = "School Type")+
  labs(x = "School Type", y = "Count")

ss%>%
  filter(shooter_age<200)%>%
  ggplot(aes(x = reorder(state, shooter_age, FUN = mean), y = shooter_age))+
  geom_boxplot()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
  labs(y = "Age of shooter", x = "State", title = "Age of Shooter by State")

ss%>%
  ggplot(aes(x = reorder(state, killed_includes_shooter, FUN = mean), y = killed_includes_shooter))+
  geom_boxplot()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
  labs(y = "Numbeer Killed (including shooter)", x = "State", title = "Fatalities")

ss%>%
  ggplot(aes(x = reorder(state, wounded, FUN = mean), y = total_injured_killed_victims))+
  geom_boxplot()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
  labs(y = "Wounded ", x = "State", title = "Number Wounded")

ss%>%
  ggplot(aes(x = reorder(state, total_injured_killed_victims, FUN = mean), y = total_injured_killed_victims))+
  geom_boxplot()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))+
  labs(y = "Wounded or Killed", x = "State", title = "Combined Wounded & Fatalities")

table(ss$state)%>%
  data.frame()%>%
  ggplot(aes(x = Var1, y = Freq))+
  geom_bar(aes(x = reorder(Var1,Freq, desc)), stat = "identity")+
  labs(x = "State", y = "Count", "Number of incidents by State (overall)")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))

Grade plots

cc = scales::seq_gradient_pal("green", "red", "Lab")(seq(0,1,length.out=12))
cc2 = scales::seq_gradient_pal("green", "red", "Lab")(seq(0,1,length.out=5))


# This will handle the ordering
d <- ss_counts %>% 
  filter(year %in% seq(2012,2018,by=1))%>%
  mutate(grade = substr(grade,1,1))%>%
  ungroup() %>%   # As a precaution / handle in a separate .grouped_df method
  arrange(year, grade) %>%   # arrange by facet variables and continuous values
  mutate(.r = row_number()) # Add a row number variable

ggplot(d, aes(x = .r, y= incident_count, fill = grade)) +  # Use .r instead of x
  geom_point(data = d%>%filter(incident_count==0),
             aes(x = .r, y= incident_count, color = grade),
             size = 0.7) +
  geom_bar(stat = "identity")+
  facet_wrap(~ year, scales = "free_x") +  # Should free scales (though could be left to user)
  scale_x_continuous(  # This handles replacement of .r for x
    breaks = d$.r,     # notice need to reuse data frame
    labels = d$state
  )+
  labs(title = "Incident Count by Grade", x = "State", y = "School Shootings")+
  scale_fill_viridis_d( name = "Grade", direction = -1)+
  scale_color_viridis_d( guide = FALSE, direction = -1)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 3.5), legend.position = c(0.7,.15), legend.direction = "horizontal",legend.key.size = unit(1.5,"line"), legend.text = element_text(size = 8), legend.title = element_text(size = 10),plot.title = element_text(hjust = 0.5))

### Outcome plot: 
ss_counts%>%
  group_by(year)%>%
  summarise(ss = sum(incident_count))%>%
  ggplot(aes(x = year, y = ss))+
  geom_line(color = "red")+
  labs(x = "Year", y = "School Shootings", title = "School Shootings in the U.S.")+
  theme_bw()+
  theme(plot.title = element_text(hjust = 0.5))

ss_counts%>%
  ggplot(aes(x = reorder(state, population, FUN = mean), y = incident_count, color = state))+
  geom_boxplot()+
  theme_bw()+
  coord_flip()+
  theme(legend.position = "none")

grades = left_join(grades, gpa_convert, by = c("grade"="letter_grade"))

dat_loc= map_data("state")%>%
  mutate(sts=toupper(region))

b=full_join(grades%>%mutate(state = tolower(state)), dat_loc, by=c("state"="region"))
b = b%>%mutate(year = as.numeric(year))%>%filter(!is.na(year))%>%select(-subregion)
b = b[complete.cases(b),]

map_plts = ggplot(data = b, aes(x = long, y = lat, group = group))+
  geom_polygon(aes(fill = gpa), color = "white")+
  scale_fill_viridis(option = "D", name = "GPA")+
  labs(title = "Grade Changes by Year", subtitle = 'Year: {frame_time}', x = NULL, y = NULL)+
  theme_bw()+
  theme(line = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), plot.title = element_text(hjust = 0.5))+
  transition_time(year, range = c(2012,2018))

animate(map_plts, nframes = 7, fps = 1)
anim_save("state_grade_animation.GIF")

dat_loc = left_join(dat_loc,states%>%mutate(state.name = tolower(state.name)), by = c("region"="state.name"))
b2=full_join(ss_counts%>%ungroup()%>%filter(year>=2007)%>%filter(!(state%in%c("AK","HI")))%>%select(state,year,access_laws,transport,screening,ban), dat_loc, by=c("state"="state.abb"))%>%
  select(-subregion)%>%
  gather(key = "law_class", value = "number_in_effect", c(3:6))
b2 = b2[complete.cases(b2),]


law.labs <- c("Access Laws", "Bans","Screening Laws", "Transport Laws")
names(law.labs) <- c("access_laws", "ban","screening","transport")

map_plts_law = ggplot(data = b2, aes(x = long, y = lat, group = group))+
  geom_polygon(aes(fill = number_in_effect), color = "white")+
  scale_fill_viridis(option = "D", name = "Laws in\nEffect\n(R-P)")+
  labs(title = "Law Changes by Year",subtitle = 'Year: {frame_time}', x = NULL, y = NULL)+
  theme_bw()+
  facet_wrap(.~law_class, labeller = labeller(law_class = law.labs))+
  theme(line = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), plot.title = element_text(hjust = 0.5))+
  transition_time(year, range = c(2007,2019))

animate(map_plts_law, nframes = 13, fps = 1)

anim_save("state_law_animation.GIF")
ss_counts%>%
  filter(year %in% seq(2012,2018,by=1))%>%
  filter(!is.na(grade))%>%
  ggplot(aes(x = incident_count , fill = grade))+
  geom_histogram(binwidth = 1)+
  facet_wrap(.~year)+
  scale_fill_manual(values = cc)+
  scale_x_continuous(breaks = seq(0,35,by=5))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 3.5))+
  theme_bw()+
  labs(x= "Number of School Shootings")

ss_counts%>%
  filter(year %in% seq(2012,2018,by=1))%>%
  filter(!is.na(grade))%>%
  ggplot(aes(x = grade, y = incident_count, color = grade))+
  geom_boxplot()+
  facet_wrap(.~year)+
  theme_bw()+
  scale_color_manual(values = cc)+
  labs(y= "Number of School Shootings", x = "Grade")

ss_counts%>%
  filter(year %in% seq(2012,2018,by=1))%>%
  ggplot()+
  geom_bar(aes(x = reorder(state,population), y = incident_count), stat = "identity")+
  geom_point(aes(x = state, y = log(population), color = "population"), size = .4)+
  theme_bw()+
  labs(x = "State", y = "Number of School Shootings", title = "Number of School Shooting and Population by State")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 3.5), legend.position = c(.7, 0), legend.justification = c(1, 0))+
  facet_wrap(.~year)+
  scale_color_discrete(name = NULL, labels = "Population (log)")

ss_counts%>%
  filter(year %in% seq(2012,2018,by=1))%>%
  group_by(state) %>%
  mutate(ordr = mean(avg_victims, na.rm = T))%>%
  filter(ordr!="NaN")%>%
  ungroup()%>%
  arrange(desc(ordr))%>%
  ggplot(aes(x = reorder(state,ordr), y = avg_victims, color = year))+
  geom_point(size = 0.6, position = "jitter")+
  labs(x = "State", y = "Average number of victims per incident", title ="Victims per School Shooting")+
  scale_color_viridis(option = "D")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))

Media Plots

media%>%
  ggplot(aes(x = prev_month_art, y = mass_shooting, color = as.numeric(year)))+geom_point(position = "jitter")+
  scale_color_continuous(name = "Year")+
  labs(y = "Mass Shooting Count", x = "Articles in Previous Month")+
  theme_bw()

media%>%
  ggplot(aes(y = articles_shootings_excluding_firearm_laws_and_regulations, x= as.numeric(year), color = as.numeric(year)))+
  geom_point()+
  geom_jitter()+
  labs(y = "Monthly Articles",x =  "Year")+
  theme(legend.position = "none")+
  theme_bw()

media%>%
  ggplot(aes(y = articles_shootings_excluding_firearm_laws_and_regulations/yearly_shootings, x= as.numeric(year), color = as.numeric(year)))+
  geom_point()+
  geom_jitter()+
  labs(y = "Monthly Articles/Yearly Mass Shootings",x =  "Year")+
  theme(legend.position = "none")+
  theme_bw()

media%>%
  ggplot(aes(y = yearly_articles, x= as.numeric(year)))+ 
  geom_point()+
  labs(y = "Yearly Articles",x =  "Year")+
  theme_bw()

school_counts = ss%>%
  select(school,city,state)%>%
  unite(col = school_city,c(school,city,state), sep = "_")%>%
  table(dnn = c("school","count"))%>%
  data.frame()%>%
  separate(col = school_city, into = c("school","city","state"), sep = "_" )


delta = data.frame(state = NULL, year = NULL, delta_gpa = NULL, delta_ss = NULL, delta_pop = NULL)
for(x in unique(ss_counts$state)){
  dat = filter(ss_counts, state == x)%>%
    arrange(year)
  chng_gpa = c(NA,diff(dat$gpa))
  chng_ss = c(NA, diff(dat$incident_count))
  chng_pop = c(NA, diff(dat$population))
  st = rep(x, nrow(dat))
  newrows =data.frame(state = st, year = dat$year, delta_gpa = chng_gpa, delta_ss = chng_ss, delta_pop= chng_pop)
  delta = bind_rows(delta,newrows)
  
}
ss_counts%>%
  group_by(year)%>%
  summarise(yearly_incidents = sum(incident_count), prev_year_art = prev_year_art[1] )%>%
  ggplot(aes(x = year, y = yearly_incidents, fill = prev_year_art))+geom_bar(stat = "identity")+
  labs(y = "Yearly School Shootings", x = "Year")+
  scale_fill_viridis(option = "D", name = "Articles in previous year")+
  theme_bw()

Bullying survey plots

ggplot(data= bullying_survey%>%filter(year>=2013), aes(x = year, y = prop, grouby_by = state, color = variable))+
  geom_line(position = "jitter")+
  scale_color_viridis_d()+
  theme_bw()

ggplot(data= bullying_survey%>%filter(variable %in% qns[9:17]), aes(x = year, y = prop, color = variable))+
  geom_point(position = "jitter")+
  geom_smooth(se=F)+
  scale_color_viridis_d()+
  theme_bw()

lab = data.frame(question = yrbss::yrbss_questions_binary$variable, label = yrbss::yrbss_questions_binary$label)%>%
   filter(question %in% qns)

kableExtra::kable(lab, byrow = F, caption = "Questions topics")
Questions topics
question label
qn13 Carried a weapon
qn14 Carried a gun
qn15 Carried a weapon on school property
qn16 Did not go to school because they felt unsafe at school or on their way to or from school
qn17 Were threatened or injured with a weapon on school property
qn18 Were in a physical fight
qn19 Were injured in a physical fight
qn20 Were in a physical fight on school property
qn24 Were bullied on school property
qn25 Were electronically bullied
qn26 Felt sad or hopeless
qn27 Seriously considered attempting suicide
qn28 Made a plan about how they would attempt suicide
qn29 Attempted suicide
qn30 Attempted suicide that resulted in an injury, poisoning, or overdose that had to be treated by a doctor or nurse
qnbullyweight Been teased b/c of weight past 12 mos
qnbullygay Been teased b/c labeled GLB past 12 mos
bullying_survey%>%
  slice(which(complete.cases(bullying_survey)))%>%
  select(label)%>%
  table()%>%
  data.frame()%>%
  kableExtra::kable(caption = "Observations per Question")
Observations per Question
. Freq
Attempted suicide 298
Attempted suicide that resulted in an injury, poisoning, or overdose that had to be treated by a doctor or nurse 282
Been teased b/c labeled GLB past 12 mos 18
Been teased b/c of weight past 12 mos 18
Carried a gun 231
Carried a weapon 275
Carried a weapon on school property 292
Did not go to school because they felt unsafe at school or on their way to or from school 298
Felt sad or hopeless 251
Made a plan about how they would attempt suicide 291
Seriously considered attempting suicide 308
Were bullied on school property 124
Were electronically bullied 94
Were in a physical fight 298
Were in a physical fight on school property 294
Were injured in a physical fight 270
Were threatened or injured with a weapon on school property 291
surv.labs = c("Bullying Score", "Physical Altercation Score", "School Danger Perception Score", "Suicidal Thoughts or Actions Score")
names(surv.labs)= c("bully_score", "fight_score", "school_safety_score","suicide_score")

survey_combined%>%
  gather(key = score_type, percent, c(3,5,7,9))%>%
  ggplot(aes(x = state, y = percent, color = score_type ))+
  geom_boxplot()+
  scale_color_viridis_d()+
  labs(title = "Survey Reponses by State", x = "State", y = "Percent Answering 'Yes'")+
  facet_wrap(.~score_type,labeller = labeller(score_type = surv.labs))+
  theme_bw()+
  theme(legend.position = "none")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 4.5))

Model Fitting

fit_grade = glm(incident_count~gpa+poverty+bullying_score+ prev_year_art+year+suicide_score+fight_score+school_safety_score+bully_score+ offset(log(population)), family = "poisson", data = ss_counts)

summary(fit_grade)
## 
## Call:
## glm(formula = incident_count ~ gpa + poverty + bullying_score + 
##     prev_year_art + year + suicide_score + fight_score + school_safety_score + 
##     bully_score + offset(log(population)), family = "poisson", 
##     data = ss_counts)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0505  -0.8770  -0.4999   0.4891   2.2763  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -7.380e+01  1.163e+02  -0.635    0.526    
## gpa                 -3.174e-01  6.986e-02  -4.543 5.54e-06 ***
## poverty             -3.746e-02  2.989e-02  -1.253    0.210    
## bullying_score       8.424e-02  5.126e-02   1.644    0.100    
## prev_year_art        5.150e-04  3.362e-04   1.532    0.126    
## year                 2.840e-02  5.772e-02   0.492    0.623    
## suicide_score        3.317e+00  4.734e+00   0.701    0.484    
## fight_score         -3.497e+00  2.584e+00  -1.353    0.176    
## school_safety_score -3.752e+00  3.997e+00  -0.939    0.348    
## bully_score          2.039e+00  3.411e+00   0.598    0.550    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 314.9  on 299  degrees of freedom
## Residual deviance: 266.2  on 290  degrees of freedom
##   (2200 observations deleted due to missingness)
## AIC: 590.52
## 
## Number of Fisher Scoring iterations: 5
fit_law = glm(reformulate(names(ss_counts)[c(23,25,26,28,30,32,34,36:39)],names(ss_counts)[3]), offset=log(population), family = "poisson", data = ss_counts)
summary(fit_law)
## 
## Call:
## glm(formula = reformulate(names(ss_counts)[c(23, 25, 26, 28, 
##     30, 32, 34, 36:39)], names(ss_counts)[3]), family = "poisson", 
##     data = ss_counts, offset = log(population))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5096  -0.8218  -0.5261   0.3507   3.0972  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.569e+01  1.092e+00 -14.364  < 2e-16 ***
## prev_year_art        5.241e-04  2.221e-04   2.359  0.01831 *  
## poverty             -2.733e-02  2.226e-02  -1.228  0.21942    
## bullying_score       6.512e-02  4.202e-02   1.550  0.12125    
## suicide_score        6.863e+00  4.031e+00   1.703  0.08863 .  
## school_safety_score -4.545e+00  3.744e+00  -1.214  0.22469    
## fight_score         -5.625e+00  2.001e+00  -2.810  0.00495 ** 
## bully_score         -2.167e+00  2.887e+00  -0.751  0.45291    
## access_laws         -7.169e-02  2.706e-02  -2.650  0.00806 ** 
## transport            8.523e-02  5.206e-02   1.637  0.10160    
## screening            4.415e-03  2.902e-02   0.152  0.87908    
## ban                 -2.192e-01  1.212e-01  -1.808  0.07054 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 560.79  on 549  degrees of freedom
## Residual deviance: 485.98  on 538  degrees of freedom
##   (1950 observations deleted due to missingness)
## AIC: 966.4
## 
## Number of Fisher Scoring iterations: 5
z_fit = pscl::zeroinfl(reformulate(names(ss_counts)[c(23,25,26,28,30,32,34,36:39)],names(ss_counts)[3]), offset=log(population), dist  = "poisson", data = ss_counts)
summary(z_fit)
## 
## Call:
## pscl::zeroinfl(formula = reformulate(names(ss_counts)[c(23, 25, 
##     26, 28, 30, 32, 34, 36:39)], names(ss_counts)[3]), data = ss_counts, 
##     offset = log(population), dist = "poisson")
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7744 -0.5866 -0.2949  0.2511  3.9460 
## 
## Count model coefficients (poisson with log link):
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.615e+01  1.124e+00 -14.370  < 2e-16 ***
## prev_year_art        1.657e-04  2.129e-04   0.778  0.43631    
## poverty             -5.031e-02  2.362e-02  -2.130  0.03318 *  
## bullying_score       4.751e-02  4.317e-02   1.101  0.27105    
## suicide_score        7.688e+00  4.212e+00   1.825  0.06795 .  
## school_safety_score  2.312e+00  4.132e+00   0.559  0.57586    
## fight_score         -6.433e+00  2.017e+00  -3.189  0.00143 ** 
## bully_score          2.701e+00  3.122e+00   0.865  0.38687    
## access_laws         -7.835e-02  2.716e-02  -2.885  0.00392 ** 
## transport            1.149e-01  5.516e-02   2.083  0.03728 *  
## screening           -1.614e-02  2.988e-02  -0.540  0.58915    
## ban                 -8.817e-02  1.226e-01  -0.719  0.47209    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -23.87916   13.58751  -1.757   0.0788 .
## prev_year_art        -0.02227    0.01072  -2.077   0.0378 *
## poverty              -0.43610    0.29507  -1.478   0.1394  
## bullying_score       -0.41529    0.35063  -1.184   0.2363  
## suicide_score        95.20197   55.58781   1.713   0.0868 .
## school_safety_score  92.93063   57.04815   1.629   0.1033  
## fight_score          -8.05032   32.49863  -0.248   0.8044  
## bully_score         140.32318   72.99385   1.922   0.0546 .
## access_laws          -0.47490    0.37900  -1.253   0.2102  
## transport            -1.24559    1.05967  -1.175   0.2398  
## screening            -1.44754    0.69171  -2.093   0.0364 *
## ban                   4.11096    2.84593   1.445   0.1486  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 197 
## Log-likelihood: -454.3 on 24 Df
fit_law_victims = MASS::glm.nb(total_victims ~ prev_year_art + poverty + bullying_score + suicide_score + school_safety_score + fight_score + bully_score + access_laws + 
    transport + screening + ban+ offset(log(population)), data = ss_counts)
summary(fit_law_victims)
## 
## Call:
## MASS::glm.nb(formula = total_victims ~ prev_year_art + poverty + 
##     bullying_score + suicide_score + school_safety_score + fight_score + 
##     bully_score + access_laws + transport + screening + ban + 
##     offset(log(population)), data = ss_counts, init.theta = 2.051095288, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4073  -0.8123  -0.2356   0.3870   3.4578  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.716e+01  1.327e+00 -12.930  < 2e-16 ***
## prev_year_art       -4.840e-05  2.873e-04  -0.168 0.866212    
## poverty             -8.310e-02  2.897e-02  -2.869 0.004122 ** 
## bullying_score       2.595e-02  5.099e-02   0.509 0.610811    
## suicide_score        1.574e+01  5.004e+00   3.144 0.001664 ** 
## school_safety_score -7.495e+00  5.781e+00  -1.296 0.194824    
## fight_score          6.526e+00  2.518e+00   2.592 0.009553 ** 
## bully_score          7.261e+00  3.837e+00   1.892 0.058450 .  
## access_laws         -1.142e-01  3.183e-02  -3.586 0.000336 ***
## transport            1.805e-01  7.041e-02   2.563 0.010382 *  
## screening            7.615e-02  3.493e-02   2.180 0.029274 *  
## ban                 -3.440e-01  1.505e-01  -2.285 0.022311 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.0511) family taken to be 1)
## 
##     Null deviance: 247.01  on 195  degrees of freedom
## Residual deviance: 195.70  on 184  degrees of freedom
##   (2304 observations deleted due to missingness)
## AIC: 804.12
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.051 
##           Std. Err.:  0.364 
## 
##  2 x log-likelihood:  -778.117
1-pchisq(195.70 , df = 184)
## [1] 0.2636877
plot(DHARMa::simulateResiduals(fit_law))

if(FALSE){
ts_dat = ss_counts%>%
  ungroup()%>%
  select(-c(4:17,23:24,27))%>%
  select(-starts_with("n"))%>%
  filter(year>=2007)%>%
  filter(year<2019)
ts_y = ts(ts_dat%>%ungroup()%>%select(incident_count))

ts_x = ts_dat%>%
  select(c(6,9:18))%>%
  mutate(population = log(population))%>%
  as.matrix()

ts_fit = tsglm(ts_y, xreg = ts_x, model = list(past_obs = 2, external = c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE)), link = "log", distr = "poisson")

best_fit = forecast::auto.arima(diff(ts_y), xreg = diff(ts_x))

astsa::sarima(diff(ts_y),p = 2, d = 0, q =2, xreg = diff(ts_x))
arima(diff(ts_y),order = c(2,0,2), xreg = diff(ts_x))$coef
sqrt(diag(vcov( arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x)))))

cilb= arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x))$coef-sqrt(diag(vcov( arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x)))))

ciub = arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x))$coef+sqrt(diag(vcov( arima(diff(ts_y),order = c(0,0,0), xreg = diff(ts_x)))))
data.frame(Lower= cilb, Upper = ciub)
}